import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
df = pd.read_excel('stock.xlsx', dtype={'code':'str'})
df.set_index('code', inplace=True)
print(df.loc['002522'])

print(len(df.industry.unique()))

print(len(df.area.unique()))

print(df.groupby('area').area.count().sort_values(ascending=False))

year = df.timeToMarket.astype('str').str[:4]
yearnum = df.groupby(year).name.count()
print(yearnum)

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
print(yearnum[yearnum.index!='0']).plot(fontsize = 14, title = '年IPO数量')

print(df.pe.mean())

print(df[df.pe > 0].pe.mean())

df['tvalue'] = 4 * df.esp * df.pe * df.totals
print(np.sum(df.pe * df.tvalue)/df.tvalue.sum())

print(df['board'] == df.index.str[:2])

print(df.groupby('board').pe.agg([('pe均值', 'mean'), ('股票数', 'count')]))
